# Compute counts of neighbors for all homes in the sample
pacman::p_load(tidyverse, fst, scales)

# Setup ----
rm(list = ls())
source("code/globals.R")

# Variables specifying which homes to include/exclude
regimestoinclude <- c("sra", "lraYY", "lraNY", "lraNN", "lraYN") 
hometypestodrop <- c("RR104", "RR105", "RR106") # Townhomes, cluster homes, condominiums (goal: no attached structures)

# Specify distance bins
binwidth <- 10
binleftends <- seq(0, 90, binwidth)
binbreaks <- c(binleftends, 100)

# Load list of parcels, filter to the homes we want to compute neighbors for ----

# Load all parcel-incident data (includes all parcels in a zip code-year there was a wildfire in that zip)
all_parcels <- readRDS(file.path(WORKING, "FullRegDataDuplicatedControls.RDS")) %>%
  select(ImportParcelID, BuildingOrImprovementNumber, 
         UNIT_AND_NUMBER = incidentid, # Use old name for matching in this script - keeps us consistent with mapping data
         combinedyear, PropertyLandUseStndCode, PropertyHouseNumber, PropertyStreetName)
# Note: ImportParcelID, BuildingOrImprovementNumber, and UNIT_AND_NUMBER are a unique key in this table

# Load code regime and merge to home info
regime_info <- read_fst(file.path(WORKING, "sra.fst"),
                        columns = c("ImportParcelID", "regime")) %>%
  distinct(ImportParcelID, .keep_all = TRUE) # Simplify case of 1 home two incidents, 1 home two buildings, etc.

parcel_info <- all_parcels %>% left_join(regime_info, by = c("ImportParcelID"))

# Label homes as pre or post-code 1 (1998-2007) or 2 (2008-2016)
parcel_info <- parcel_info %>%
  mutate(tocode = case_when(
    regime %in% c("lraNN", "otherstates", "fra") ~ "pre",
    regime %in% c("sra", "lraNY", "lraYN", "lraYY") & combinedyear < 1998 ~ "pre",
    regime %in% c("sra", "lraNY", "lraYN", "lraYY") & combinedyear >= 1998 ~ "post",
    is.na(combinedyear) ~ "pre" # Important - homes with unknown year built are assumed to be pre-codes for neighbor analysis.
  ))


# Load neighbor mapping ----
dist_mapping <- read_fst(file.path(WORKING, "neighbor-mapping.fst"),
                             columns = c(
                               "ImportParcelID", "BuildingOrImprovementNumber",
                               "ImportParcelID_nbr", "BuildingOrImprovementNumber_nbr",
                               "UNIT_AND_NUMBER", "centroid_dist", "wall_dist"
                             ), as.data.table = T
)

# Merge in self characteristics
dist_mapping <- dist_mapping %>%
  inner_join(parcel_info, by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER"))

# Merge in neighbor characteristics
dist_mapping <- dist_mapping %>%
  left_join(parcel_info %>% 
              rename(tocode_nbr = tocode, 
                     regime_nbr = regime,
                     combinedyear_nbr = combinedyear, 
                     PropertyLandUseStndCode_nbr = PropertyLandUseStndCode,
                     PropertyHouseNumber_nbr = PropertyHouseNumber, 
                     PropertyStreetName_nbr = PropertyStreetName),
    by = c("ImportParcelID_nbr" = "ImportParcelID", 
           "BuildingOrImprovementNumber_nbr" = "BuildingOrImprovementNumber", 
           "UNIT_AND_NUMBER")
  )

# Home filtering ----

# For each focal home, flag those that have potential issues with neighbors
parcel_flags <- dist_mapping %>%
  group_by(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER) %>%
  summarize(
    tocode = first(tocode), regime = first(regime), PropertyLandUseStndCode = first(PropertyLandUseStndCode),
    any_same_address = any(ImportParcelID != ImportParcelID_nbr & 
                                (PropertyHouseNumber == PropertyHouseNumber_nbr & 
                                   PropertyStreetName == PropertyStreetName_nbr)),
    any_dist_zero = any(ImportParcelID != ImportParcelID_nbr & centroid_dist == 0),
    any_neighbor_badregime = any(!(regime_nbr %in% regimestoinclude)),
    any_neighbor_prohibtype = any(PropertyLandUseStndCode_nbr %in% hometypestodrop),
    any_neighbor_nacode = any(is.na(tocode_nbr)),
    no_wall_dist = all(is.na(wall_dist))) %>% # Don't include homes if they are missing all of their wall distances 
  mutate(
    include_in_neighbor_analysis = regime %in% regimestoinclude & !(PropertyLandUseStndCode %in% hometypestodrop) & !is.na(tocode) &
      !any_same_address & !any_dist_zero & !any_neighbor_badregime & !any_neighbor_prohibtype & !any_neighbor_nacode,
    include_in_wall_analysis = include_in_neighbor_analysis & !no_wall_dist
  ) 

# Add these flags to full neighbor mapping
dist_mapping2 <- dist_mapping %>% 
  left_join(parcel_flags %>% select(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER, include_in_neighbor_analysis, include_in_wall_analysis),
            by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER"))

# Limit to matches we want to include
# Also remove self matches
neighbor_mapping <- dist_mapping2 %>% 
  filter(include_in_neighbor_analysis == TRUE, ImportParcelID != ImportParcelID_nbr)

# Create bins 
# TODO: fix these names


summary(neighbor_mapping)
  
### CHECKING OUT NEIGHBOR MAPPING
check <- neighbor_mapping %>% 
  filter(include_in_neighbor_analysis, wall_dist <= 10 & centroid_dist > 30) %>%
  transmute(ImportParcelID, ImportParcelID_nbr, UNIT_AND_NUMBER,
         addr = paste(PropertyHouseNumber, PropertyStreetName),
         addr_nbr = paste(PropertyHouseNumber_nbr, PropertyStreetName_nbr),
         centroid_dist, wall_dist, tocode, tocode_nbr, combinedyear, combinedyear_nbr)


# Get bins of centroid distances by to-code and no-code status
# We adjust all centroid distances by subtracting 20 m to make them comparable to wall to wall distances
dist_centroids <- neighbor_mapping %>%
  mutate(dbin = cut(oob_squish(centroid_dist - 20, c(1, 200)), breaks = binbreaks, labels = binleftends, right = FALSE)) %>% # Don't let distance get below 1 --- we bin 0-10 anyway
  filter(!is.na(dbin)) %>% # Will drop homes with NA centroid_dist or centroid_dist outside of breaks
  count(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER, dbin, tocode_nbr) %>%
  pivot_wider(
    id_cols = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER"),
    names_from = c("dbin", "tocode_nbr"),
    names_prefix = "Dcentroids",
    values_from = n,
    values_fill = 0,
    names_sort = TRUE
  )

# Count total neighbors
total_counts_centroids <- neighbor_mapping %>%
  group_by(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER) %>%
  summarize(
    total_neighbors_in_30m_centroids = sum(centroid_dist <= 30),
    total_neighbors_in_100m_centroids = sum(centroid_dist <= 100),
    total_neighbors_in_200m_centroids = sum(centroid_dist <= 200)
  )




# Get bins of wall distances by to-code and no-code status
dist_walls <- neighbor_mapping %>%
  mutate(dbin = cut(wall_dist, breaks = binbreaks, labels = binleftends, right = FALSE)) %>%
  filter(!is.na(dbin)) %>% # Will drop homes with NA wall_dist or wall_dist outside of breaks
  count(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER, dbin, tocode_nbr) %>%
  pivot_wider(
    id_cols = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER"),
    names_from = c("dbin", "tocode_nbr"),
    names_prefix = "Dwalls",
    values_from = n,
    values_fill = 0,
    names_sort = TRUE
  )

# Merge and fill in zeros for homes with no neighbors
# We do this separately for centroids and walls since we don't want to mark homes that have no computed wall distances as zeroes
neighbor_data_centroids <- parcel_flags %>%
  filter(include_in_neighbor_analysis == TRUE) %>%
  select(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER) %>%
  left_join(dist_centroids, by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER")) %>%
  left_join(total_counts_centroids, by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER"))

neighbor_data_centroids[is.na(neighbor_data_centroids)] <- 0

neighbor_data_walls <- parcel_flags %>%
  filter(include_in_wall_analysis == TRUE) %>% 
  select(ImportParcelID, BuildingOrImprovementNumber, UNIT_AND_NUMBER) %>%
  left_join(dist_walls, by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER")) 

neighbor_data_walls[is.na(neighbor_data_walls)] <- 0

# Combine
neighbor_data <- neighbor_data_centroids %>%
  left_join(neighbor_data_walls, by = c("ImportParcelID", "BuildingOrImprovementNumber", "UNIT_AND_NUMBER")) %>%
  ungroup() 


# Version using pre-post
neighbor_data <- neighbor_data %>%
  mutate(across(matches("^D[[:alpha:]]*[0-9]*_"), ~ .x > 0, .names = "any_{.col}")) %>% # Dummies of counts
  mutate(across(matches("^D[[:alpha:]]*[0-9]*_"), ~ ifelse(.x > 2, 2, .x), .names = "n_{.col}")) # Capped counts

# Save dataset for merging into main data
write_fst(neighbor_data, file.path(WORKING, "neighbor-vars.fst"))

